package mitlab.seg.ner.corpus.io;

import java.io.File;
import java.io.FileFilter;

/**
 * 文本读取工具
 */
public class EasyReader {
  /**
   * 根目录
   */
  String root;
  /**
   * 是否输出进度
   */
  boolean verbose = true;

  /**
   * 构造
   * 
   * @param root 根目录
   */
  public EasyReader(String root) {
    this.root = root;
  }

  /**
   * 构造
   * 
   * @param root 根目录
   * @param verbose 是否输出进度
   */
  public EasyReader(String root, boolean verbose) {
    this.root = root;
    this.verbose = verbose;
  }

  /**
   * 读取
   * 
   * @param handler 处理逻辑
   * @param size 读取多少个文件
   * @throws Exception
   */
  public void read(LineHandler handler, int size) throws Exception {
    File rootFile = new File(root);
    File[] files;
    if (rootFile.isDirectory()) {
      files = rootFile.listFiles(new FileFilter() {
        @Override
        public boolean accept(File pathname) {
          return pathname.isFile() && !pathname.getName().endsWith(".bin");
        }
      });
      if (files == null) {
        if (rootFile.isFile())
          files = new File[] {rootFile};
        else
          return;
      }
    } else {
      files = new File[] {rootFile};
    }

    int n = 0;
    int totalAddress = 0;
    long start = System.currentTimeMillis();
    for (File file : files) {
      if (size-- == 0)
        break;
      if (file.isDirectory())
        continue;
      if (verbose)
        System.out.printf("正在处理%s, %d / %d\n", file.getName(), ++n, files.length);
      IOUtil.LineIterator lineIterator = new IOUtil.LineIterator(file.getAbsolutePath());
      while (lineIterator.hasNext()) {
        ++totalAddress;
        String line = lineIterator.next();
        if (line.length() == 0)
          continue;
        handler.handle(line);
      }
    }
    handler.done();
    if (verbose)
      System.out.printf("处理了 %.2f 万行，花费了 %.2f min\n", totalAddress / 10000.0,
          (System.currentTimeMillis() - start) / 1000.0 / 60.0);
  }

  /**
   * 读取
   * 
   * @param handler 处理逻辑
   * @throws Exception
   */
  public void read(LineHandler handler) throws Exception {
    read(handler, Integer.MAX_VALUE);
  }
}
